1 Load Data

apcra.pro <- fread('./source/chem/apcra_pro.csv') # APCRA prospective chemicals
apcra.pro[,c(1) := NULL]
setnames(apcra.pro, c('V2','V3','V4'), c('DTXSID','CASRN','preferred_name'))
apcra.pro <- apcra.pro[-c(1),]
apcra.pro[,list := 'Pro']

# annotate chemicals that are also in APCRA retrospective case study - these will likely be data-rich
apcra.ret <- as.data.table(read.xlsx('./source/chem/Supp_File_2_pod_ratio_master_final.xlsx', sheet=1)) 
apcra.pro[,apcra.ret := 0]
apcra.ret.dtxsids <- apcra.ret[,DTXSID]
apcra.pro[DTXSID %in% apcra.ret.dtxsids, apcra.ret := 1]
toxval_v9_4 <- dbGetQuery(con, "SELECT * FROM res_toxval_v94.toxval INNER JOIN
                          res_toxval_v94.chemical ON toxval.dtxsid=chemical.dtxsid INNER JOIN
                          res_toxval_v94.species ON toxval.species_id=species.species_id;") %>% as.data.table()

toxval_v9_4 <- toxval_v9_4[dtxsid %in% apcra.pro[,DTXSID]]
save(toxval_v9_4, file='./source/toxval_v9_4_apcra_pro_full.RData')

2 Refine the toxval data

2.1 Human Health

  • Select qc_status==‘pass’
  • Reduce to studies for human health.
load(file='./source/toxval_v9_4_apcra_pro_full.RData')
table(toxval_v9_4$qc_status)
## 
##    fail:human_eco not specified     fail:toxval_numeric is null 
##                              37                            2084 
##  fail:toxval_type not specified fail:toxval_units not specified 
##                            1559                             143 
##                            pass 
##                           89652
#toxval.apcra <- toxval_v9_1[qa_status==1]
toxval.apcra <- toxval_v9_4[qc_status=='pass']

colnames(toxval.apcra)
##  [1] "toxval_id"                         "source_hash"                      
##  [3] "source_table"                      "chemical_id"                      
##  [5] "dtxsid"                            "source"                           
##  [7] "subsource"                         "source_url"                       
##  [9] "subsource_url"                     "details_text"                     
## [11] "priority_id"                       "qc_status"                        
## [13] "risk_assessment_class"             "human_eco"                        
## [15] "toxval_type"                       "toxval_type_original"             
## [17] "toxval_subtype"                    "toxval_subtype_original"          
## [19] "toxval_numeric"                    "toxval_numeric_original"          
## [21] "toxval_numeric_converted"          "toxval_numeric_standard"          
## [23] "toxval_numeric_human"              "toxval_units"                     
## [25] "toxval_units_original"             "toxval_units_converted"           
## [27] "toxval_units_standard"             "toxval_units_human"               
## [29] "toxval_numeric_qualifier"          "toxval_numeric_qualifier_original"
## [31] "study_type"                        "study_type_original"              
## [33] "study_duration_class"              "study_duration_class_original"    
## [35] "study_duration_value"              "study_duration_value_original"    
## [37] "study_duration_units"              "study_duration_units_original"    
## [39] "species_id"                        "species_original"                 
## [41] "strain"                            "strain_original"                  
## [43] "strain_group"                      "habitat"                          
## [45] "sex"                               "sex_original"                     
## [47] "critical_effect"                   "critical_effect_original"         
## [49] "population"                        "population_original"              
## [51] "exposure_route"                    "exposure_route_original"          
## [53] "exposure_method"                   "exposure_method_original"         
## [55] "exposure_form"                     "exposure_form_original"           
## [57] "media"                             "media_original"                   
## [59] "lifestage"                         "lifestage_original"               
## [61] "generation"                        "generation_original"              
## [63] "year"                              "year_original"                    
## [65] "mw"                                "datestamp"                        
## [67] "source_source_id"                  "toxval_uuid"                      
## [69] "toxval_hash"                       "dtxsid"                           
## [71] "casrn"                             "name"                             
## [73] "species_id"                        "common_name"                      
## [75] "latin_name"                        "kingdom"                          
## [77] "phylum_division"                   "subphylum_div"                    
## [79] "superclass"                        "class"                            
## [81] "tax_order"                         "family"                           
## [83] "genus"                             "species"                          
## [85] "subspecies"                        "variety"                          
## [87] "ecotox_group"                      "habitat"
head(toxval.apcra)
#table(toxval.apcra$species_supercategory)
table(toxval.apcra$human_eco)
## 
##          eco human health 
##        71524        18128
toxval.apcra <- toxval.apcra[human_eco=='human health']
table(toxval.apcra$risk_assessment_class)
## 
##                      acute       air quality standard 
##                       2181                        270 
##                    chronic                   clinical 
##                       4595                          4 
##             clinical study              developmental 
##                          2                       1622 
##             dose selection    drinking water standard 
##                          5                         49 
##             exposure limit                   genetics 
##                         18                          8 
##               genotoxicity                     growth 
##                        152                         17 
##                Hershberger                      human 
##                         35                          1 
##             immunotoxicity                   in vitro 
##                        106                        271 
##                 morphology                  mortality 
##                         22                        143 
##              neurotoxicity   neurotoxicity short-term 
##                        300                          2 
##   neurotoxicity subchronic                      other 
##                          3                         95 
##                   physchem          repeat dose other 
##                         78                         31 
##               reproduction reproduction developmental 
##                       2009                          7 
##                 short-term      soil quality standard 
##                       1894                        185 
##   special toxicology study                 subchronic 
##                         23                       3313 
##               uterotrophic     water quality standard 
##                         89                        598
toxval.apcra <- toxval.apcra[risk_assessment_class %in% c('chronic',
                                                          'developmental',
                                                          'Hershberger',
                                                          'immunotoxicity',
                                                          'neurotoxicity',
                                                          'neurotoxicity short-term',
                                                          'neurotoxicity subchronic',
                                                          'repeat dose other',
                                                          'reproduction',
                                                          'reproduction developmental',
                                                          'short-term',
                                                          'subchronic',
                                                          'uterotrophic')]
table(toxval.apcra$common_name)
## 
##                American Mink American Short-Tailed Shrews 
##                           62                            8 
##                    Black Rat                          Cat 
##                           47                            1 
##        Common Redbacked Vole                 Common Shrew 
##                            2                           23 
##                   Cow Family                       Coyote 
##                            6                            1 
##                   Deer Mouse            Desert Cottontail 
##                           58                           34 
##                          Dog                     Dog, Rat 
##                         1342                            2 
##                 Domestic Cat                Domestic Goat 
##                            6                            3 
##               Domestic Sheep          Domesticated Cattle 
##                           66                           16 
##           Eastern Cottontail           European Pine Vole 
##                           15                            1 
##              European Rabbit               Golden Hamster 
##                          117                           22 
##            Grasshopper Mouse             Gray-Tailed Vole 
##                            5                            8 
##                   Guinea Pig                      Hamster 
##                           14                           96 
##                Harvest Mouse                        Human 
##                            1                            5 
##                   Human (RA)           Indian Desert Jird 
##                          546                            5 
##             Little Brown Bat      Long-Tailed Field Mouse 
##                           42                            4 
##                      Mammals                  Meadow Vole 
##                            1                            8 
##                         Mink                       Monkey 
##                            9                           10 
##                Montane Shrew                        Mouse 
##                           34                         2422 
##                   Mouse, Rat                          Pig 
##                           14                            4 
##                    Pine Vole                 Prairie Vole 
##                            6                           11 
##             Pygmy Wood Mouse                       Rabbit 
##                            1                          652 
##                  Rabbit, Rat                          Rat 
##                            2                         8178 
##                      Red Fox                  River Otter 
##                           42                            8 
##                    Root Vole                  Shaw's Jird 
##                            1                            1 
##                        Sheep                Water Buffalo 
##                            5                           14 
##               Water Buffalos           White-Footed Mouse 
##                            5                           11 
##            White-Tailed Deer                    Woodchuck 
##                            8                            1
  • After some inspection it seemed ECOTOX and DOE Wildlife Benchmarks were adding a lot of additional terrestrial species from ecotoxicology.
  • However removing these sources would also remove studies from ‘Rat’ so these seemingly should not be omitted
toxval.apcra[source %in% c('DOE Wildlife Benchmarks',
                           'ECOTOX')]
  • Unique listing of species suggests ecotox terrestrial species.
unique(toxval.apcra$common_name)
##  [1] "Human (RA)"                   "Human"                       
##  [3] "Rat"                          "Mouse"                       
##  [5] "Rabbit"                       "Dog"                         
##  [7] "Domesticated Cattle"          "Domestic Sheep"              
##  [9] "Rabbit, Rat"                  "European Rabbit"             
## [11] "Mouse, Rat"                   "Deer Mouse"                  
## [13] "Black Rat"                    "Monkey"                      
## [15] "Little Brown Bat"             "American Short-Tailed Shrews"
## [17] "White-Footed Mouse"           "Meadow Vole"                 
## [19] "Mink"                         "Red Fox"                     
## [21] "River Otter"                  "White-Tailed Deer"           
## [23] "Desert Cottontail"            "Montane Shrew"               
## [25] "Water Buffalo"                "Golden Hamster"              
## [27] "Domestic Goat"                "Sheep"                       
## [29] "American Mink"                "Hamster"                     
## [31] "Cow Family"                   "Prairie Vole"                
## [33] "Gray-Tailed Vole"             "Cat"                         
## [35] "Guinea Pig"                   "Indian Desert Jird"          
## [37] "Water Buffalos"               "Shaw's Jird"                 
## [39] "Common Shrew"                 "Mammals"                     
## [41] "Pig"                          "Woodchuck"                   
## [43] "Domestic Cat"                 "Long-Tailed Field Mouse"     
## [45] "Common Redbacked Vole"        "Root Vole"                   
## [47] "European Pine Vole"           "Pygmy Wood Mouse"            
## [49] "Harvest Mouse"                "Eastern Cottontail"          
## [51] "Coyote"                       "Pine Vole"                   
## [53] "Dog, Rat"                     "Grasshopper Mouse"
  • Based on explicitly specifying species, this is what we are left with in the dataset.
toxval.apcra <- toxval.apcra[common_name %in% c('Human (RA)',
                                                'Human',
                                                'Rat',
                                                'Mouse',
                                                'Rabbit',
                                                'Dog',
                                                'Rabbit, Rat',
                                                'European Rabbit',
                                                'Mouse, Rat',
                                                'Black Rat',
                                                'Monkey',
                                                'Hamster',
                                                'Guinea Pig',
                                                'Dog, Rat')]

table(toxval.apcra$common_name)
## 
##       Black Rat             Dog        Dog, Rat European Rabbit      Guinea Pig 
##              47            1342               2             117              14 
##         Hamster           Human      Human (RA)          Monkey           Mouse 
##              96               5             546              10            2422 
##      Mouse, Rat          Rabbit     Rabbit, Rat             Rat 
##              14             652               2            8178
  • Curious how the human data come to be
  • Looks like these are generally specific PPRTV or RfD values
  • Can divide these out later as these are probably not fit for comparison to PODs as they are pre-adjusted values (for uncertainty)
toxval.apcra[common_name %in% c('Human','Human (RA)')]

2.2 Repeat exposures

  • How do we want to refine?
  • This case study is about repeat exposures, so drop acute and select repeat dose studies.
table(toxval.apcra$study_type)
## 
##                          -                  avoidance 
##                          2                          1 
##                    chronic              developmental 
##                       4266                       1611 
##                Hershberger             immunotoxicity 
##                         35                        101 
##              neurotoxicity   neurotoxicity short-term 
##                        256                          2 
##   neurotoxicity subchronic          repeat dose other 
##                          3                         31 
##               reproduction reproduction developmental 
##                       1968                          7 
##                 short-term                 subchronic 
##                       1839                       3236 
##               uterotrophic 
##                         89
#unique(toxval.apcra$study_type)

toxval.apcra <- toxval.apcra[study_type %in% c('chronic',
                                               'short-term',
                                               'subchronic',
                                               'noncancer',
                                               'developmental',
                                               'repeat dose other',
                                               'reproduction',
                                               'neurotoxicity',
                                               'immunotoxicity',
                                               'uterotrophic',
                                               'neurotoxicity short-term',
                                               'Hershberger',
                                               'reproduction developmental',
                                               'neurotoxicity subchronic'
                                              )]
unique(toxval.apcra$study_type)
##  [1] "chronic"                    "short-term"                
##  [3] "subchronic"                 "developmental"             
##  [5] "repeat dose other"          "reproduction"              
##  [7] "neurotoxicity"              "immunotoxicity"            
##  [9] "uterotrophic"               "neurotoxicity short-term"  
## [11] "Hershberger"                "reproduction developmental"
## [13] "neurotoxicity subchronic"

2.3 Units

  • Make sure the toxval_type and units are interpretable for our case study.
  • Include oral exposures
  • Keep the reference dose (RfD) for now, out of interest (can be dropped later for interpretation).

2.3.1 Exposure Route

table(toxval.apcra[,c('exposure_method','exposure_route')])
##                   exposure_route
## exposure_method       - dermal environmental inhalation injection multiple
##   -                 378    131             4         55       468        1
##   aerosol             0      0             0         38         0        0
##   capsule             3      0             0          0         0        0
##   culture             0      0             7          0         0        0
##   diet                0      0             0          0         0        0
##   driniking water     0      0             0          0         0        0
##   drinking water     10      0             0          0         0        0
##   dust                0      0             0          2         0        0
##   feed               43      0             0          0         0        0
##   gavage            241      0             0          0         0        0
##   Gelatin capsules    0      0             0          0         0        0
##   injection           0      0             0          0         0        0
##   media mixture       0      0             4          0         0        0
##   oral                0      0             0          0         0        0
##   topical             0     48             0          0         0        0
##   unspecified         0      0             0          0         0        0
##   vapor               0      0             0         29         0        0
##                   exposure_route
## exposure_method    Not Reported oral subcutaneous
##   -                           8 3005           27
##   aerosol                     0    0            0
##   capsule                     0  378            0
##   culture                     0    0            0
##   diet                        0   75            0
##   driniking water             0    4            0
##   drinking water              0   41            0
##   dust                        0    0            0
##   feed                        0 6558            0
##   gavage                      0 1869            0
##   Gelatin capsules            0    4            0
##   injection                   0    0           10
##   media mixture               0    0            0
##   oral                        0    1            0
##   topical                     0    0            0
##   unspecified                 0    0            2
##   vapor                       0    0            0
  • Sometimes the “-” exposure route is % in diet
table(toxval.apcra[exposure_route=='-', c('exposure_route','exposure_method_original')])
##               exposure_method_original
## exposure_route   - capsule drinking water feed gavage unspecified
##              - 368       3             10   43    241          10
  • Included exposure routes that were oral or ‘-’ - the units appeared generally interpretable a oral even when route was ‘-’
toxval.apcra <- toxval.apcra[exposure_route %in% c('-',
                                                   'oral')]

toxval.apcra[exposure_route=='-' & exposure_method %in% c('-')]

2.3.2 Dose

  • Examine the units on dose.
table(toxval.apcra$toxval_units)
## 
##             %        % diet         % w/v (mg/kg-day)-1     (mg/m3)-1 
##           104            61             6            42            10 
##     g in diet   g/kg bdwt/d           g/L            mg   mg/100 g bw 
##             2             3            10             4            14 
##     mg/animal        mg/day         mg/kg     mg/kg-day    mg/kg diet 
##             2             7            67         12021            59 
##  mg/kg diet/d     mg/kg/org          mg/L        mg/org    mg/org-day 
##             1             2             6             2            50 
##         ml/kg     mL/kg-day            mM         ng/mL      ppb diet 
##             2             1             8             2            29 
##           ppm    ug/kg bdwt  ug/kg bdwt/d    ug/org-day          uM/g 
##            51             4            11             4            13 
##      uM/kg bw 
##            12
  • First pass filter on dose units
toxval.apcra <- toxval.apcra[toxval_units %in% c('%',
                                                 '% diet',
                                                 '(mg/kg-day)-1',
                                                 'g/ kg bdwt/d',
                                                 'g/L',
                                                 'mg/kg',
                                                 'mg/kg-day',
                                                 'mg/kg diet',
                                                 'mg/kg diet/d',
                                                 'ppb diet',
                                                 'ppm',
                                                 'ug/kg bdwt',
                                                 'ug/kg bdwt/d')]

table(toxval.apcra$toxval_units)
## 
##             %        % diet (mg/kg-day)-1           g/L         mg/kg 
##           104            61            42            10            67 
##     mg/kg-day    mg/kg diet  mg/kg diet/d      ppb diet           ppm 
##         12021            59             1            29            51 
##    ug/kg bdwt  ug/kg bdwt/d 
##             4            11
  • Convert all preserved units on dose to mg/kg-bw/day.
  • species with ppm
toxval.apcra[,toxval.apcra.unit := toxval_units]
toxval.apcra[,toxval.apcra.mkd := toxval_numeric]
table(toxval.apcra[toxval_units=='ppm']$common_name)
## 
## Black Rat   Hamster     Mouse       Rat 
##        27         2         4        18
  • Ensure all toxval_numeric are numeric by examining range
toxval.apcra <- toxval.apcra[!(toxval_numeric==-999)] # didn't change after filtering for qa_status
range(toxval.apcra$toxval_numeric)
## [1] 1e-05 6e+05
table(toxval.apcra[,c('toxval_units')])
## toxval_units
##             %        % diet (mg/kg-day)-1           g/L         mg/kg 
##           104            61            42            10            67 
##     mg/kg-day    mg/kg diet  mg/kg diet/d      ppb diet           ppm 
##         12021            59             1            29            51 
##    ug/kg bdwt  ug/kg bdwt/d 
##             4            11
  • Convert all units to mg/kg/day ###
# conversion from ug or g/kg-day to mg/kg-day (mkd)
toxval.apcra[toxval_units=='ug/kg bdwt', toxval.apcra.mkd := toxval_numeric/1000]
toxval.apcra[toxval_units=='ug/kg bdwt/d', toxval.apcra.mkd := toxval_numeric/1000]
toxval.apcra[toxval_units %in% c('ug/kg bdwt','ug/kg bdwt/d'), toxval.apcra.unit := 'mg/kg-day']
  • Based on subset of % and % diet, will assume that is feed and delete Hamster
table(toxval.apcra[toxval_units %in% c('% diet','%'), c('exposure_method','common_name')])
##                common_name
## exposure_method Hamster Mouse Rat
##            -          0    54  94
##            feed       1     3  13
# will assume that % means in feed 
# conversions for % feed/diet into mkd, where 1% in diet = 10,000 ppm
# get from % in diet to ppm multiplying by 10,000
# get to mg/kg/d by muliplying by the 1ppm to mkd conversion by species

toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('dog','Dog'), toxval.apcra.mkd := (toxval_numeric*10000)*0.025]
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('rat', 'Rat'), toxval.apcra.mkd := (toxval_numeric*10000)*0.05]
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('mouse', 'Mouse'), toxval.apcra.mkd := (toxval_numeric*10000)*0.15]
toxval.apcra[toxval_units %in% c('%', '% diet') & common_name %in% c('rabbit','Rabbit'), toxval.apcra.mkd := (toxval_numeric*10000)*0.03]
toxval.apcra <- toxval.apcra[!(toxval_units %in% c('%', '% diet') & common_name %in% c('Hamster'))]

toxval.apcra[toxval_units %in% c('%', '% diet'), toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra[toxval_units %in% c('ppm','ppb'),c('toxval_units','common_name')])
##             common_name
## toxval_units Black Rat Hamster Mouse Rat
##          ppm        27       2     4  18
# conversions to mkd from ppm

#toxval.apcra[toxval_units=='ppm' & species_common=='dog', toxval.apcra.mkd := toxval_numeric*0.025]
toxval.apcra[toxval_units=='ppm' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='ppm' & common_name=='Black Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='ppm' & common_name=='Mouse', toxval.apcra.mkd := toxval_numeric*0.15]
#toxval.apcra[toxval_units=='ppm' & species_common=='rabbit', toxval.apcra.mkd := toxval_numeric*0.03]
#toxval.apcra[toxval_units=='ppm' & species_common=='guinea pig', toxval.apcra.mkd := toxval_numeric*0.040]
toxval.apcra[toxval_units=='ppm' & common_name=='Hamster', toxval.apcra.mkd := toxval_numeric*0.094]
toxval.apcra[toxval_units=='ppm', toxval.apcra.unit := 'mg/kg-day']
# conversions from ppb diet
table(toxval.apcra[toxval_units=='ppb diet', c('common_name', 'toxval_units')])
##            toxval_units
## common_name ppb diet
##       Mouse       20
##       Rat          9
toxval.apcra[toxval_units=='ppb diet' & common_name=='Mouse',toxval.apcra.mkd := toxval_numeric*0.001*0.015 ]
toxval.apcra[toxval_units=='ppb diet' & common_name=='Rat',toxval.apcra.mkd := toxval_numeric*0.001*0.05 ]
toxval.apcra[toxval_units=='ppb diet', toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra[toxval_units %in% c('mg/kg diet','mg/kg diet/d'),c('toxval_units','common_name')])
##               common_name
## toxval_units   Dog Hamster Mouse Rat
##   mg/kg diet     1      13    24  21
##   mg/kg diet/d   0       0     0   1
# toxval.apcra[toxval_units=='mg/kg'] # somewhat impossible to know if this is in diet or kg-bw; on inspection it mostly seems like mg/kg-bw
# conversions for mg/kg diet to mkd, where 1 mg/kg diet ~ 1 ppm in diet
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Dog', toxval.apcra.mkd := toxval_numeric*0.025]
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='mg/kg diet/d' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*0.05]
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Mouse', toxval.apcra.mkd := toxval_numeric*0.15]
toxval.apcra[toxval_units=='mg/kg diet' & common_name=='Hamster', toxval.apcra.mkd := toxval_numeric*0.094]
toxval.apcra[toxval_units %in% c('mg/kg diet','mg/kg diet/d'), toxval.apcra.unit := 'mg/kg-day']
toxval.apcra[toxval_units=='g/L' & common_name=='Rat', toxval.apcra.mkd := toxval_numeric*1000*0.05]
toxval.apcra[toxval_units %in% c('g/L'), toxval.apcra.unit := 'mg/kg-day']
table(toxval.apcra$toxval.apcra.unit)
## 
## (mg/kg-day)-1         mg/kg     mg/kg-day 
##            42            67         12350

2.3.3 ToxVal type

table(toxval.apcra$toxval_type)
## 
##                        BMD                       BMDL 
##                         38                         41 
##              BMDL (0.5 SD)                  BMDL (05) 
##                          2                          2 
##                  BMDL (10)        cancer slope factor 
##                          3                         43 
##                        ED3                       ED30 
##                          1                          2 
##                       ED50                       HNEL 
##                         14                         88 
##                       IC50                       LC50 
##                          1                         18 
##                        LD0                      LD100 
##                          3                          3 
##                       LD16                       LD50 
##                          3                         17 
##                       LD84                        LEL 
##                          3                       2277 
##                      LOAEC                      LOAEL 
##                          1                       2273 
##                LOAEL (HED)                       LOEC 
##                          3                         14 
##                       LOEL                        MRL 
##                        982                         45 
##                        NEL                      NOAEC 
##                       2003                          2 
##                      NOAEL                       NOEC 
##                       2914                         10 
##                       NOEL                        NTD 
##                       1342                          1 
##                        RfD    RfD (screening chronic) 
##                        279                          1 
## RfD (screening subchronic)                      SRfDo 
##                          1                         24 
##                        T25                       TDLo 
##                          2                          3
#unique(toxval.apcra$toxval_type)
  • Keep only the following units on effect level
toxval.apcra <- toxval.apcra[toxval_type %in% c('BMD',
                                                'BMDL',
                                                'BMDL10',
                                                'HNEL',
                                                'LEL',
                                                'LOAEL',
                                                'NEL',
                                                'NOAEL',
                                                'NOAEL ',
                                                'NOEL',
                                                'NEL',
                                                # remove these later but want to see them first
                                                'RfD',
                                                'cancer slope factor'
                                                )]
table(toxval.apcra$toxval_type)
## 
##                 BMD                BMDL cancer slope factor                HNEL 
##                  38                  41                  43                  88 
##                 LEL               LOAEL                 NEL               NOAEL 
##                2277                2273                2003                2914 
##                NOEL                 RfD 
##                1342                 279

2.4 Examine doses

  • A histogram of the values suggests that most fall within 1-1000 mg/kg-bw/day.
  • likely want to examine extreme values.
  • Most of the extreme values appear to be RfDs (corrected with UFs).
ggplot(data=toxval.apcra[,c('toxval.apcra.mkd')], aes(x=log10(toxval.apcra.mkd)))+
  geom_histogram(bins=50)+
  theme_bw()+
  xlab("Toxval log10-mg/kg/day values")

  • Remove RfDs and look at distribution of NELs/NOAELs/LOAELs/LELs
toxval.apcra.norfds <- toxval.apcra[!(toxval_type %in% c('RfD','cancer slope factor'))]
ggplot(data=toxval.apcra.norfds[,c('toxval.apcra.mkd')], aes(x=log10(toxval.apcra.mkd)))+
  geom_histogram(bins=50)+
  theme_bw()

  • 262 observations are less than 0.1 mg/kg/day
  • 51 observations are less than 0.01 mg/kg/day for 9 dtxsids
  • 165 DTXSIDs in the dataset with no RfDs
lo.values <- toxval.apcra.norfds[toxval.apcra.mkd < 0.01]

length(unique(toxval.apcra.norfds$dtxsid)) #165 dtxsids
## [1] 165
  • One chemical in the low observations appears to have one observation that is much lower than the others for the chemical.
lo.values[dtxsid=='DTXSID6020062']
  • Look at distribution just for this one chemical…DTXSID6020062
  • the lowest values are all the same, seem to come from same study, and are extremely low compared to the rest.
ggplot(data=toxval.apcra.norfds[dtxsid=='DTXSID6020062',c('toxval.apcra.mkd')], aes(x=log10(toxval.apcra.mkd)))+
  geom_histogram(bins=50)+
  theme_bw()

3 Compare the new ToxVal PODs

3.1 Create summary

  • Create the summary for comparison to the previous retrospective chemicals.
  • Retrospective case study used a refined set from dev_toxval_v5.
  • Adding in summary quantile values from 5-30%.
toxval.apcra.summary <- unique(toxval.apcra.norfds[, list(
  min.toxval.numeric = min(toxval.apcra.mkd),
  p5.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.05)),
  p10.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.10)),
  p15.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.15)),
  p20.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.20)),
  p25.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.25)),
  p30.toxval.numeric = quantile(toxval.apcra.mkd, probs=c(0.30)),
  max.toxval.numeric = max(toxval.apcra.mkd),
  median.toxval.numeric = median(toxval.apcra.mkd),
  mean.toxval.numeric = mean(toxval.apcra.mkd),
  stdev.toxval.numeric = sd(toxval.apcra.mkd),
  number.toxval.numeric = .N
), by=list(dtxsid, casrn, name)])
  • Create separate summary for subchronic only.
table(toxval.apcra.norfds$risk_assessment_class)
## 
##                    chronic              developmental 
##                       3687                       1529 
##                Hershberger             immunotoxicity 
##                          7                         38 
##              neurotoxicity   neurotoxicity short-term 
##                         98                          2 
##   neurotoxicity subchronic          repeat dose other 
##                          3                         28 
##               reproduction reproduction developmental 
##                       1862                          7 
##                 short-term                 subchronic 
##                        877                       2801 
##               uterotrophic 
##                         37
# updating from study_duration_class to risk_assessment_class increases the number of chemicals with subchronic PODs from 9 to 160

toxval.apcra.summary.subchronic <- unique(toxval.apcra.norfds[risk_assessment_class %in% c('subchronic','repeat dose other','short-term'), list(
  min.toxval.numeric.sub = min(toxval.apcra.mkd),
  p5.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.05)),
  p10.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.10)),
  p15.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.15)),
  p20.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.20)),
  p25.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.25)),
  p30.toxval.numeric.sub = quantile(toxval.apcra.mkd, probs=c(0.30)),
  max.toxval.numeric.sub = max(toxval.apcra.mkd),
  median.toxval.numeric.sub = median(toxval.apcra.mkd),
  mean.toxval.numeric.sub = mean(toxval.apcra.mkd),
  stdev.toxval.numeric.sub = sd(toxval.apcra.mkd),
  number.toxval.numeric.sub = .N
), by=list(dtxsid, casrn, name)])
  • APCRA retrospective supp file 2 values
  • Use the 5th percentile PODs for comparison
  • 96 substances to compare
toxval.apcra.summary$apcra.ret.5p.POD <- apcra.ret$p5.POD[match(toxval.apcra.summary$dtxsid,
                                                                apcra.ret$DTXSID)]

toxval.apcra.summary[, diff := as.numeric(log10(p5.toxval.numeric) - log10(apcra.ret.5p.POD)) ]
toxval.apcra.summary[!is.na(diff),label := name]
toxval.apcra.summary[diff > -0.5, label := ""]
toxval.apcra.summary[is.na(diff), label := ""]

big.diffs <- toxval.apcra.summary[diff< -0.5, dtxsid]

length(unique(toxval.apcra.summary[!is.na(diff)]$dtxsid))
## [1] 96

3.2 Comparison of overall POD to retrospective case study POD

#library(ggrepel)

ggplot(data=toxval.apcra.summary, aes(x=apcra.ret.5p.POD, y=p5.toxval.numeric)) +
  geom_point(size=1)+ 
  geom_text_repel(data=toxval.apcra.summary, aes(x=apcra.ret.5p.POD, y=p5.toxval.numeric, label=label))+
  scale_y_log10(limits=c(10^-3,10^5),
                breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::math_format(10^.x)))+
   scale_x_log10(limits=c(10^-3,10^5),
                breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::math_format(10^.x)))+
  geom_abline(slope=1, intercept=0) +
  geom_abline(slope=1, intercept=0.5, linetype="dashed") +
  geom_abline(slope=1, intercept=-0.5, linetype="dashed")+
  theme_bw()+
  theme(axis.title = element_text(size=14),
        axis.text = element_text(size=12))+
  xlab('5th percentile APCRA retrospective POD')+
  ylab('5th percentile APCRA prospective POD')

* For these substances, the new ToxVal 5th percentile POD was lower by at least 0.5 log10-mg/kg/day * None of them seem quite as egregious as the one above - no separations of 3-4 orders of magnitude between lowest values and next lowest.

diffs <- toxval.apcra.norfds[dtxsid %in% big.diffs, c('dtxsid','name','toxval.apcra.mkd')]
ggplot(data=diffs, aes(x=log10(toxval.apcra.mkd)))+
  geom_histogram(bins=50)+
  theme_bw()+
  scale_y_log10()+
  facet_wrap(~ name)

3.3 Comparison of different quantiles

toxval.summary.long <- melt.data.table(toxval.apcra.summary,
                                       id.vars = c('dtxsid','casrn','name', 'apcra.ret.5p.POD'),
                                       measure.vars = c('p5.toxval.numeric',
                                                        'p10.toxval.numeric',
                                                        'p15.toxval.numeric',
                                                        'p20.toxval.numeric',
                                                        'p25.toxval.numeric',
                                                        'p30.toxval.numeric'),
                                       variable.name = c('toxval.p'))

col.num <- c("value", "apcra.ret.5p.POD")
toxval.summary.long <- toxval.summary.long[, (col.num) := lapply(.SD, function(x) log10(x)), .SDcols = col.num ]
head(toxval.summary.long)
fig.toxval.ecdf <- ggplot(toxval.summary.long[toxval.p %in% c('p5.toxval.numeric',
                                                        'p10.toxval.numeric',
                                                        'p15.toxval.numeric',
                                                        'p20.toxval.numeric',
                                                        'p25.toxval.numeric',
                                                        'p30.toxval.numeric')], aes(value, color=toxval.p))+
  stat_ecdf(geom='step', size=1.5)+
  scale_y_continuous(trans = 'log10',
                     breaks= c(0.01, 0.1,0.2,0.3,0.4,0.5,0.75, 1))+
  ylab("Cumulative Frequency") +
  xlab('log10 POD quantile')+
  theme_bw() +
  theme(
    axis.line = element_line(colour = "black"),
    axis.text = element_text(size=10),
    axis.title = element_text(size=12, face='bold'))+
  theme(axis.text.y = element_text(family = "sans", face = "bold", size=12))+
  theme(legend.position="right", legend.title=element_blank())+
  scale_x_continuous(breaks=seq(-5,10,1)) +
  coord_cartesian(xlim = c(-5, 10)) +
  #scale_color_viridis(discrete=TRUE, name='Ratio Type')+
  scale_colour_manual(breaks=c('p5.toxval.numeric',
                                                        'p10.toxval.numeric',
                                                        'p15.toxval.numeric',
                                                        'p20.toxval.numeric',
                                                        'p25.toxval.numeric',
                                                        'p30.toxval.numeric'),
                      values=c("#440154FF", "#3B528BFF", "#21908CFF", "#5DC863FF", "#FDE725FF", 'darkmagenta'),
                      labels('5th %-ile','10th %-ile','15th %-ile', '20th %-ile','25th %ile', '30th %ile'))+
  geom_vline(xintercept=-2, lty='dashed', color='red')+
  geom_vline(xintercept=2, lty='dashed', color='red')+
  geom_vline(xintercept=0, color='red')+
  geom_hline(yintercept=0.90, lty='dashed', color='red')

fig.toxval.ecdf

fig.toxval.violin <- ggplot(toxval.summary.long[toxval.p %in% c('p5.toxval.numeric',
                                                        'p10.toxval.numeric',
                                                        'p15.toxval.numeric',
                                                        'p20.toxval.numeric',
                                                        'p25.toxval.numeric',
                                                        'p30.toxval.numeric')], aes(x=toxval.p, y=value, fill=toxval.p))+
  geom_violin(draw_quantiles = c(0.25,0.5,0.75), color='white', trim=FALSE)+
  #geom_boxplot(width=0.1, color='white')+
  scale_y_continuous(breaks=seq(-3,2,0.5))+
  ylab("ToxVal POD Value, log10-mg/kg/day") +
  xlab('Percentile')+
  theme_bw() +
  theme(
    axis.line = element_line(colour = "black"),
    axis.text = element_text(size=10),
    axis.title = element_text(size=12, face='bold'))+
  theme(
        axis.text.x = element_text(angle=45, hjust=1))+
  theme(legend.position="right", legend.title=element_blank())+
  scale_fill_manual(values=viridis(6),
                    breaks=c('p5.toxval.numeric',
                                                        'p10.toxval.numeric',
                                                        'p15.toxval.numeric',
                                                        'p20.toxval.numeric',
                                                        'p25.toxval.numeric',
                                                        'p30.toxval.numeric'),
                    labels=c('5th','10th','15th','20th','25th','30th'))+
  scale_x_discrete(labels=c('p5.toxval.numeric' ='5th',
                                                        'p10.toxval.numeric' = '10th',
                                                        'p15.toxval.numeric' = '15th',
                                                        'p20.toxval.numeric' = '20th',
                                                        'p25.toxval.numeric' = '25th',
                                                        'p30.toxval.numeric' = '30th'))

fig.toxval.violin

wilcox <- group_by(toxval.summary.long, toxval.p) %>%
  summarise(
    count = n(),
    median = median(value, na.rm = TRUE),
    IQR = IQR(value, na.rm = TRUE)
  )
wilcox
wilcox.test(data=toxval.apcra.summary, 
            toxval.apcra.summary$p5.toxval.numeric, 
            toxval.apcra.summary$p10.toxval.numeric, mu=0, alt='two.sided',paired=TRUE, conf.int=TRUE, conf.level=0.95)
## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  toxval.apcra.summary$p5.toxval.numeric and toxval.apcra.summary$p10.toxval.numeric
## V = 0, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
##  -10.400507  -3.360246
## sample estimates:
## (pseudo)median 
##       -5.33543
fig.toxval.distrib <- ggplot()+
  geom_histogram(data=toxval.apcra.norfds[,c('dtxsid','toxval.apcra.mkd')],
                 aes(x=toxval.apcra.mkd))+
  
  theme_bw()+
  ylab('Frequency')+
  xlab('ToxVal Numeric POD (log10-mg/kg/day)')+
  theme(axis.text.x = element_text(size=12, angle=45, hjust=1),
        axis.text.y = element_text(size=12),
        axis.title = element_text(size=14),
        plot.title = element_text(hjust=0.5))+
  scale_x_log10(limits=c(10^-4,10^4),
                breaks = scales::trans_breaks("log10", function(x) 10^x),
                labels = scales::trans_format("log10", scales::math_format(10^.x)))


fig.toxval.distrib
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

file.dir <- paste('output/', sep='')
file.name <- paste('/SuppFig_ToxVal_percentiles_', Sys.Date(), '.tiff', sep='')
file.path <- paste(file.dir, file.name, sep='')
dir.create(path=file.dir, showWarnings = FALSE, recursive = TRUE)
tiff(file.path, width=8, height=5, units='in', res=450)
plot_grid(fig.toxval.distrib, fig.toxval.violin, ncol=2, labels=c("A", "B"), label_size = 14)
dev.off()

4 Reproducibility

4.1 Write files

list_data <- list("toxval.apcra.pro.summary" = as.data.frame(toxval.apcra.summary),
                  "toxval.apcra.pro.summ.SUBonly" = as.data.frame(toxval.apcra.summary.subchronic),
                  "toxval.apcra.pro.all" = as.data.frame(toxval.apcra.norfds))

write.xlsx(list_data, './source/apcra_pro_toxval_v9_4_PODs.xlsx')
save(toxval.apcra.norfds,
     toxval.apcra.summary,
     toxval.apcra.summary.subchronic,
     file='./source/apcra_pro_toxval_v9_4_PODs.RData')

4.2 Rsession

print(sessionInfo())
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] viridis_0.6.2        viridisLite_0.4.1    tcpl_3.1.0          
##  [4] tidyr_1.3.0          RMySQL_0.10.25       DBI_1.2.2           
##  [7] randomForest_4.7-1.1 plotly_4.10.1        openxlsx_4.2.5.2    
## [10] jtools_2.2.1         kableExtra_1.3.4     httk_2.3.0          
## [13] gplots_3.1.3         ggstance_0.3.6       ggrepel_0.9.3       
## [16] DT_0.28              dplyr_1.1.1          DescTools_0.99.48   
## [19] data.table_1.14.8    cowplot_1.1.1        caret_6.0-94        
## [22] lattice_0.21-8       ggplot2_3.4.2       
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.2         systemfonts_1.0.4    plyr_1.8.8          
##   [4] lazyeval_0.2.2       splines_4.2.2        listenv_0.9.0       
##   [7] digest_0.6.31        foreach_1.5.2        htmltools_0.5.8.1   
##  [10] fansi_1.0.4          magrittr_2.0.3       memoise_2.0.1       
##  [13] recipes_1.0.5        globals_0.16.2       gower_1.0.1         
##  [16] svglite_2.1.1        hardhat_1.3.0        timechange_0.2.0    
##  [19] colorspace_2.1-0     blob_1.2.4           rvest_1.0.3         
##  [22] mitools_2.4          rbibutils_2.2.13     xfun_0.43           
##  [25] crayon_1.5.2         jsonlite_1.8.4       Exact_3.2           
##  [28] survival_3.5-5       iterators_1.0.14     glue_1.6.2          
##  [31] gtable_0.3.4         ipred_0.9-14         webshot_0.5.4       
##  [34] future.apply_1.10.0  scales_1.3.0         mvtnorm_1.1-3       
##  [37] Rcpp_1.0.10          tcplfit2_0.1.6       bit_4.0.5           
##  [40] proxy_0.4-27         deSolve_1.35         sqldf_0.4-11        
##  [43] stats4_4.2.2         lava_1.7.2.1         survey_4.1-1        
##  [46] prodlim_2023.03.31   htmlwidgets_1.6.4    httr_1.4.7          
##  [49] RColorBrewer_1.1-3   farver_2.1.1         pkgconfig_2.0.3     
##  [52] nnet_7.3-18          sass_0.4.9           utf8_1.2.3          
##  [55] RMariaDB_1.2.2       labeling_0.4.3       tidyselect_1.2.1    
##  [58] rlang_1.1.0          reshape2_1.4.4       munsell_0.5.1       
##  [61] cellranger_1.1.0     tools_4.2.2          cachem_1.0.7        
##  [64] cli_3.6.1            gsubfn_0.7           generics_0.1.3      
##  [67] RSQLite_2.3.1        evaluate_0.23        stringr_1.5.1       
##  [70] fastmap_1.1.1        yaml_2.3.7           ModelMetrics_1.2.2.2
##  [73] knitr_1.46           bit64_4.0.5          zip_2.2.2           
##  [76] pander_0.6.5         caTools_1.18.2       purrr_1.0.1         
##  [79] rootSolve_1.8.2.3    future_1.32.0        nlme_3.1-162        
##  [82] xml2_1.3.3           compiler_4.2.2       rstudioapi_0.14     
##  [85] e1071_1.7-13         tibble_3.2.1         bslib_0.7.0         
##  [88] stringi_1.7.12       highr_0.10           Matrix_1.5-4        
##  [91] vctrs_0.6.1          msm_1.7              pillar_1.9.0        
##  [94] lifecycle_1.0.4      Rdpack_2.4           jquerylib_0.1.4     
##  [97] bitops_1.0-7         lmom_2.9             R6_2.5.1            
## [100] KernSmooth_2.23-20   gridExtra_2.3        parallelly_1.35.0   
## [103] gld_2.6.6            codetools_0.2-19     boot_1.3-28.1       
## [106] MASS_7.3-58.3        gtools_3.9.4         chron_2.3-60        
## [109] proto_1.0.0          withr_3.0.0          expm_0.999-7        
## [112] parallel_4.2.2       hms_1.1.3            grid_4.2.2          
## [115] rpart_4.1.19         timeDate_4022.108    class_7.3-21        
## [118] rmarkdown_2.26       pROC_1.18.0          numDeriv_2016.8-1.1 
## [121] lubridate_1.9.2